require(data.table)
library("dplyr")
library(ggplot2)
##############################
############################  Prepare intermediate data for Fig.3-4
##############################

#change to full local path to the replication data
wd1 <- "C:\\Users\\YFan\\...\\NFood_replication\\"


#====use all grids including crop failures in yield,yield0,yield1,yield2 to calc CLM modeled change in global yield
#load climate and yield change data for MLR analysis (from Data2.Input-for-MLR-analysis.R)
dif.8crop <- readRDS(file = paste0(wd1,"/data/RData/allclimate-yield-changedata-MLR.Rds")) #all grid samples including those yield=0 (set to 5th percentile of each crop)
dif.8crop[,length(unique(Sample)), by=.(Scenario)] #same sample size
dif.8crop[Year==2090,sum(area.wt), by=.(Scenario,Year)]

#global area weighted mean of grid-level change (CLM5 modeled)
dif.8crop.wm <- rbind(dif.8crop[Scenario=="RCP45 - RCP85",
                                .(Y.mod.dif=weighted.mean(yield.dif, area.wt, na.rm = T), #==RCP45_SSP5LU - RCP85_45CO2
                                  Y.mod.dif2=weighted.mean(yield.dif+co2.log, area.wt, na.rm = T), #==RCP45_SSP5LU - RCP85
                                  Y.mod.dif3=weighted.mean(yield.dif+co2.log+luc.log, area.wt, na.rm = T), #==RCP45 - RCP85
                                  yield1=weighted.mean(yield1, area.wt, na.rm=T), yield0=weighted.mean(yield0, area.wt, na.rm=T), yield=weighted.mean(yield, area.wt, na.rm=T),
                                  co2.log=weighted.mean(co2.log, area.wt, na.rm=T), luc.log=weighted.mean(luc.log, area.wt, na.rm = T),
                                  area=sum(area.wt, na.rm=T)), #by=.(Scenario,Year) ]
                                by=.(Scenario,Crop,Irrig,Year)],
                      dif.8crop[Scenario!="RCP45 - RCP85",
                                .(Y.mod.dif=weighted.mean(yield.dif, area.wt, na.rm = T),
                                  yield0=weighted.mean(yield0, area.wt, na.rm=T), yield=weighted.mean(yield, area.wt, na.rm=T), luc.log=0, co2.log=0,
                                  area=sum(area.wt, na.rm=T)), #by=.(Scenario,Year) ]
                                by=.(Scenario,Crop,Irrig,Year) ], fill=TRUE)
#above dif.8crop was masked by area>1000, and includes crop failures (yield=0 set to 5th percentile)


#==read global data to calc modeled total effects when considering LUC effects
#load data prepared by Data1.GrowingSeasonClimate-Yield.R
load(paste0(wd1,"/data/RData/GridSample-SSP2-SSP5-Data-final.Rdata")) 
df.ssp2.all[Year==2090,sum(area),by=.(Scenario,Year)] #this is real yield and area data for RCP45, based on its own area mask (area>1000ha)
df.ssp5.all[Year==2090,sum(area),by=.(Scenario,Year)]
df.ssp5.all[Year==2090,summary(yield),by=.(Scenario,Year)]
df.ssp2.all[Year==2090,summary(yield),by=.(Scenario,Year)] 
length(unique(df.ssp2.all$Sample)); length(unique(df.ssp5.all$Sample))
#=do not apply yield mask, use all samples including crop failure (yield=0) to calc luc.eff and tot effect on global crop production
df.lu.ssp2 <- df.ssp2.all[Year>=2020 & Year !=2100]
df.lu.ssp5 <- df.ssp5.all[Year>=2020 & Year !=2100] #ssp5 has same number of samples as dif.8crop
length(unique(df.lu.ssp2$Sample)); length(unique(df.lu.ssp5$Sample)) #SSP2 has much more samples, more dispersed crop areas than SSP5

#===add weight average yield per SSP2 and SSP5 to 6 crops or global and then differencing them to get total luc.eff
#(SSP2 has much more crop grid cells than SSP5, thus using luc.eff from dif.8crop.s will underestimate the total luc effect per crop)
#the sample size here is different from dif.8crop.s due to LUC, so have to aggregate to global first and then merge with climate effects: effect.wm
lu.ssp2.wm <- df.lu.ssp2[, .(yield2=weighted.mean(yield, area, na.rm=T), #fertn2=weighted.mean(fertn, area,na.rm=T),
                             area2=sum(area, na.rm=T)), by=.(Scenario,Crop,Irrig,Year)]
lu.ssp2.wm[Scenario=="SSP2",Scenario:="RCP45 - RCP85"]
lu.ssp2.wm[Year==2090,sum(area2), by=.(Scenario,Year)]

#replace the crop area and base yield data of dif.8crop with global data for calculating Y.mod.dif3 (clm modeled pct change of global average yield from RCP85 to RCP45 including LUC)
dif.8crop.wm <-dif.8crop.wm[lu.ssp2.wm, yield2:=i.yield2, on=.(Scenario,Crop,Irrig,Year)] #yield of RCP45(SSP2)
dif.8crop.wm <-dif.8crop.wm[lu.ssp2.wm, area2:=i.area2, on=.(Scenario,Crop,Irrig,Year)] #area of RCP45(SSP2)
dif.8crop.wm[Year==2090,sum(area), by=.(Scenario,Year)]
dif.8crop.wm[Year==2090,sum(area2), by=.(Scenario,Year)]
#now dif.8crop.wm has all crop grid samples (area>1000) including those with crop failures (set to 5th percentile)

save(list=c("lu.ssp2.wm", "dif.8crop.wm"), file = paste0(wd1,"/data/RData/Fig3-4.globalmean-yieldchanges.Rdata"))
rm(dif.8crop, df.lu.ssp2,df.lu.ssp5)






